In [1]:
# Integrate three LSE scRNA-seq data
# 04-17-2025
# used scRNA-seq data: GSE263932, GSE163121, GSE136035
In [89]:
library(Seurat)
library(ggplot2)
library(dplyr)
options(repr.plot.width=10, repr.plot.height=8)
library(SeuratWrappers)
library(harmony)
library(clusterProfiler)
library(enrichplot)
library(msigdbr)
library(stringr)
set.seed(2025)
In [3]:
# GSE263932
in_path <- "~/projects/2024/RA/GSE263932/"
setwd(in_path)
f_barcodes <- dir(pattern = "*barcodes.tsv.gz")
d_name <- character()
for (i in 1:length(f_barcodes)) {
# f_features[i] <- paste0(substr(f_barcodes[i], 1, (nchar(f_barcodes[i])-15)),"features.tsv.gz")
# f_matrix[i] <- paste0(substr(f_barcodes[i], 1, (nchar(f_barcodes[i])-15)),"matrix.mtx.gz")
d_name[i] <- substr(f_barcodes[i], 1, 10)
# if (!file.exists(paste0(in_path,d_name[i],"/barcodes.tsv.gz"))) {
# dir.create(paste0(in_path,d_name[i]))
# file.copy(paste0(in_path,f_barcodes[i]), paste0(in_path,d_name[i],"/barcodes.tsv.gz"))
# file.copy(paste0(in_path,f_features[i]), paste0(in_path,d_name[i],"/features.tsv.gz"))
# file.copy(paste0(in_path,f_matrix[i]), paste0(in_path,d_name[i],"/matrix.mtx.gz"))
# }
}
scdata <- list()
for (i in 1:length(f_barcodes)) {
data <- Read10X(data.dir = paste0(in_path,d_name[i]))
scdata[[i]] <- CreateSeuratObject(counts = data, project = d_name[i])
}
# print(scdata)
d_name_all <- d_name
d_name_1 <- d_name
l <- length(scdata)
l
10
In [4]:
# GSE163121
in_path <- "~/projects/2024/RA/GSE163121_RAW/"
d_name <- c('GSM4972213', 'GSM4972214', 'GSM4972215', 'GSM4972216', 'GSM4972217')
for (i in 1:length(d_name)) {
ii <- i + l
data <- Read10X(data.dir = paste0(in_path,d_name[i]))
scdata[[ii]] <- CreateSeuratObject(counts = data, project = d_name[i])
}
# print(scdata)
l <- length(scdata)
d_name_all <- c(d_name_all,d_name)
d_name_2 <- d_name
l
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
15
In [5]:
# GSE136035
in_path <- "~/projects/2024/RA/GSE136035_RAW/"
f_barcodes <- dir(path = in_path, pattern = "*barcodes.tsv.gz")
d_name <- character()
f_features <- character()
f_matrix <- character()
for (i in 1:length(f_barcodes)) {
d_name[i] <- substr(f_barcodes[i], 1, 10)
f_features[i] <- paste0(substr(f_barcodes[i], 1, (nchar(f_barcodes[i])-15)),"genes.tsv.gz")
f_matrix[i] <- paste0(substr(f_barcodes[i], 1, (nchar(f_barcodes[i])-15)),"matrix.mtx.gz")
if (!file.exists(paste0(in_path,d_name[i],"/barcodes.tsv.gz"))) {
dir.create(paste0(in_path,d_name[i]))
file.copy(paste0(in_path,f_barcodes[i]), paste0(in_path,d_name[i],"/barcodes.tsv.gz"))
file.copy(paste0(in_path,f_features[i]), paste0(in_path,d_name[i],"/features.tsv.gz"))
file.copy(paste0(in_path,f_matrix[i]), paste0(in_path,d_name[i],"/matrix.mtx.gz"))
}
}
for (i in 1:length(f_barcodes)) {
ii <- i + l
data <- Read10X(data.dir = paste0(in_path,d_name[i]))
if ("Gene Expression" %in% names(data)) {
scdata[[ii]] <- CreateSeuratObject(counts = data$`Gene Expression`, project = d_name[i])
} else {
scdata[[ii]] <- CreateSeuratObject(counts = data, project = d_name[i])
}
}
# print(scdata)
l <- length(scdata)
d_name_all <- c(d_name_all,d_name)
d_name_3 <- d_name
l
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
Warning message:
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
10X data contains more than one type and is being returned as a list containing matrices of each type.
10X data contains more than one type and is being returned as a list containing matrices of each type.
23
In [6]:
Sobj <- merge(scdata[[1]], y = scdata[-1], add.cell.ids = d_name_all)
In [7]:
Sobj[["percent.mt"]] <- PercentageFeatureSet(Sobj, pattern = "^MT-")
In [8]:
p1 <- FeatureScatter(Sobj, feature1 = "nCount_RNA", feature2 = "percent.mt", raster = F)
p2 <- FeatureScatter(Sobj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", raster = F)
p1
p2
In [9]:
Sobj <- subset(Sobj, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt <20)
# Sobj <- JoinLayers(Sobj)
Sobj
An object of class Seurat 49632 features across 133389 samples within 1 assay Active assay: RNA (49632 features, 0 variable features) 23 layers present: counts.GSM8207627, counts.GSM8207628, counts.GSM8207629, counts.GSM8207630, counts.GSM8207631, counts.GSM8207632, counts.GSM8207633, counts.GSM8207634, counts.GSM8207635, counts.GSM8207636, counts.GSM4972213, counts.GSM4972214, counts.GSM4972215, counts.GSM4972216, counts.GSM4972217, counts.GSM4039805, counts.GSM4039807, counts.GSM4039809, counts.GSM4039813, counts.GSM4039815, counts.GSM4039819, counts.GSM4885423, counts.GSM4885425
In [10]:
Idents(Sobj) <- Sobj$orig.ident
rename_map <- c(
setNames(rep("GSE263932", length(d_name_1)), d_name_1),
setNames(rep("GSE163121", length(d_name_2)), d_name_2),
setNames(rep("GSE136035", length(d_name_3)), d_name_3)
)
# Rename
Sobj <- RenameIdents(Sobj, rename_map)
unique(Idents(Sobj))
Sobj$batch <- Idents(Sobj)
- GSE263932
- GSE163121
- GSE136035
Levels:
- 'GSE263932'
- 'GSE163121'
- 'GSE136035'
In [11]:
Sobj <- NormalizeData(Sobj, normalization.method = "LogNormalize", scale.factor = 10000)
Sobj <- FindVariableFeatures(Sobj, selection.method = "vst", nfatures = 2000)
Sobj <- ScaleData(Sobj)
Sobj <- RunPCA(Sobj)
Normalizing layer: counts.GSM8207627 Normalizing layer: counts.GSM8207628 Normalizing layer: counts.GSM8207629 Normalizing layer: counts.GSM8207630 Normalizing layer: counts.GSM8207631 Normalizing layer: counts.GSM8207632 Normalizing layer: counts.GSM8207633 Normalizing layer: counts.GSM8207634 Normalizing layer: counts.GSM8207635 Normalizing layer: counts.GSM8207636 Normalizing layer: counts.GSM4972213 Normalizing layer: counts.GSM4972214 Normalizing layer: counts.GSM4972215 Normalizing layer: counts.GSM4972216 Normalizing layer: counts.GSM4972217 Normalizing layer: counts.GSM4039805 Normalizing layer: counts.GSM4039807 Normalizing layer: counts.GSM4039809 Normalizing layer: counts.GSM4039813 Normalizing layer: counts.GSM4039815 Normalizing layer: counts.GSM4039819 Normalizing layer: counts.GSM4885423 Normalizing layer: counts.GSM4885425 Finding variable features for layer counts.GSM8207627 Finding variable features for layer counts.GSM8207628 Finding variable features for layer counts.GSM8207629 Finding variable features for layer counts.GSM8207630 Finding variable features for layer counts.GSM8207631 Finding variable features for layer counts.GSM8207632 Finding variable features for layer counts.GSM8207633 Finding variable features for layer counts.GSM8207634 Finding variable features for layer counts.GSM8207635 Finding variable features for layer counts.GSM8207636 Finding variable features for layer counts.GSM4972213 Finding variable features for layer counts.GSM4972214 Finding variable features for layer counts.GSM4972215 Finding variable features for layer counts.GSM4972216 Finding variable features for layer counts.GSM4972217 Finding variable features for layer counts.GSM4039805 Finding variable features for layer counts.GSM4039807 Finding variable features for layer counts.GSM4039809 Finding variable features for layer counts.GSM4039813 Finding variable features for layer counts.GSM4039815 Finding variable features for layer counts.GSM4039819 Finding variable features for layer counts.GSM4885423 Finding variable features for layer counts.GSM4885425 Centering and scaling data matrix PC_ 1 Positive: CD79A, CD79B, HLA-DQA1, TCL1A, IGHM, LTB, HLA-DPB1, CD74, HLA-DRA, VPREB3 HLA-DPA1, RPLP0, IGHD, HLA-DRB1, CD69, SPIB, RPS8, FCER2, CD72, CD24 RPS12, TSPAN13, CXXC5, ISG20, PDLIM1, POU2AF1, MALAT1, PLPP5, EAF2, EEF1A1 Negative: TYROBP, CST3, LYZ, S100A9, FCN1, S100A11, S100A8, S100A4, IFITM3, FCER1G AIF1, S100A6, S100A10, SERPINA1, CFD, LGALS1, VCAN, IFI30, CD68, SRGN CEBPD, LGALS3, TSPO, TYMP, APOBEC3A, CD14, LST1, MNDA, TIMP1, ANXA1 PC_ 2 Positive: IL32, CD3E, CD3D, IL7R, CD7, CD3G, LCK, CD2, CD247, GIMAP7 LAT, MAL, TCF7, GZMM, LINC00861, ITM2A, ZAP70, KLRB1, CAMK4, CD6 LEPROTL1, AQP3, TRAT1, CTSW, TC2N, TRBC1, INPP4B, PRKCH, CD27, FLT3LG Negative: CD74, HLA-DRA, HLA-DRB1, HLA-DPA1, HLA-DPB1, CD79A, HLA-DQA1, CD79B, TCL1A, IGHM VPREB3, SPI1, CYBB, SPIB, PLD4, CTSZ, CTSS, CD72, CXXC5, IGHD MZB1, PPP1R14A, YBX3, ITM2C, CAPG, CD24, JUP, EAF2, GNAI2, HCK PC_ 3 Positive: PPIB, HSPA8, AQP3, LDHB, MIF, IFITM1, SUB1, TNFRSF17, ACTG1, TMEM258 FKBP11, SEC61G, SSR4, SEC11C, CD27, TMSB10, B2M, SPCS1, PEBP1, DAD1 JCHAIN, IL32, MZB1, HSP90B1, ISG20, PRDX2, COX5A, PSME2, ITM2C, NOSIP Negative: FOS, S100A9, CD83, CST3, LYZ, FTH1, FCN1, S100A8, NEAT1, MALAT1 FOSB, MT-CO1, NR4A2, VCAN, DUSP1, TYROBP, ADAM28, CDKN1A, IFI30, APOBEC3A SERPINA1, CFD, NR4A1, AIF1, CD14, MNDA, KLF4, LGALS2, JUN, CD74 PC_ 4 Positive: TCL1A, ACTB, CD79B, LTB, PLD4, IFITM2, VPREB3, PFN1, GNAI2, MX1 PCDH9, IFI44L, HIST1H1E, SPIB, IFITM1, H1FX, ARPC1B, JUP, IGLC1, CYBB ACTG1, IFI30, TMSB10, AEBP1, CTSZ, YBX3, CD72, CD24, TSPAN13, LIMS2 Negative: NR4A2, CD83, ADAM28, NR4A1, HERPUD1, MAP3K8, CDKN1A, DUSP2, PELI1, LMNA RHOB, MTRNR2L12, ZFP36, NR4A3, DDIT4, SAT1, RGS2, JUNB, ATF3, IER2 BCL2A1, TRAF4, RGS1, SLC2A3, PIM3, FTH1, NFKBIA, IGHA1, NFKBID, ID3 PC_ 5 Positive: NKG7, GNLY, GZMB, CST7, PRF1, GZMA, KLRD1, SPON2, FGFBP2, CCL5 GZMH, CLIC3, CTSW, KLRF1, ADGRG1, IL2RB, S1PR5, MATK, SH2D1B, TBX21 HOPX, IGFBP7, TTC38, CCL4, PRSS23, C1orf21, FCGR3A, KLRC1, PTGDR, CX3CR1 Negative: MAL, RPS12, EEF1A1, IL7R, LTB, RPS8, LDHB, RPL10, CD3D, RPLP1 CD3E, AQP3, RPS18, PASK, RPLP0, NOSIP, TRAT1, LEPROTL1, FLT3LG, INPP4B CD3G, CD5, CD40LG, CD27, TCF7, CAMK4, VIM, RGS10, TMSB10, SIRPG
In [12]:
ElbowPlot(Sobj)
In [13]:
DimPlot(Sobj, group.by = "orig.ident")
DimPlot(Sobj, group.by = "batch")
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [14]:
Sobj <- RunUMAP(Sobj, dims = 1:15, reduction = "pca", reduction.name = "umap.unintegrated")
p0 <- DimPlot(Sobj, reduction = "umap.unintegrated", group.by = c("orig.ident", "Batch"))
Warning message: “The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation' This message will be shown once per session” 21:29:45 UMAP embedding parameters a = 0.9922 b = 1.112 21:29:45 Read 133389 rows and found 15 numeric columns 21:29:45 Using Annoy for neighbor search, n_neighbors = 30 21:29:45 Building Annoy index with metric = cosine, n_trees = 50 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | 21:30:01 Writing NN index file to temp file /tmp/RtmpJjUFIM/file2dbbc84c5e2f4b 21:30:01 Searching Annoy index using 1 thread, search_k = 3000 21:30:53 Annoy recall = 100% 21:30:54 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 21:30:59 Initializing from normalized Laplacian + noise (using RSpectra) 21:31:16 Commencing optimization for 200 epochs, with 5814612 positive edges 21:31:16 Using rng type: pcg 21:32:46 Optimization finished Warning message: “The following requested variables were not found: Batch” Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [15]:
# Sobj <- IntegrateLayers(
# object = Sobj, method = HarmonyIntegration,
# orig.reduction = "pca", new.reduction = "harmony",
# verbose = FALSE
# )
Sobj <- RunHarmony(Sobj,"orig.ident", plot_convergence = T, theta = 3)
harmony_embeddings <- Embeddings(Sobj, "harmony")
harmony_embeddings[1:5, 1:5]
Transposing data matrix Initializing state using k-means centroids initialization Harmony 1/10 Harmony 2/10 Harmony 3/10 Harmony 4/10 Harmony converged after 4 iterations
| harmony_1 | harmony_2 | harmony_3 | harmony_4 | harmony_5 | |
|---|---|---|---|---|---|
| GSM8207627_AAACCTGAGACCTTTG-1 | -1.0830656 | 1.279810 | -4.343171 | -0.4067075 | 0.05262298 |
| GSM8207627_AAACCTGAGATCCTGT-1 | -2.0226379 | 11.354504 | 1.194501 | 1.9259544 | -3.26795228 |
| GSM8207627_AAACCTGAGATGTAAC-1 | -21.3149790 | -9.468534 | -2.093603 | -1.9698764 | -1.11391355 |
| GSM8207627_AAACCTGAGCACCGTC-1 | 0.2204256 | 4.063539 | -5.372365 | -0.8389147 | 0.79834396 |
| GSM8207627_AAACCTGAGGCGATAC-1 | -21.1654703 | -7.153838 | -2.402306 | -4.0988843 | -1.58158235 |
In [16]:
DimPlot(Sobj, reduction = "harmony", group.by = c("orig.ident", "batch"))
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [17]:
Sobj <- RunUMAP(Sobj, dims = 1:15, reduction = "harmony")
21:35:30 UMAP embedding parameters a = 0.9922 b = 1.112 21:35:30 Read 133389 rows and found 15 numeric columns 21:35:30 Using Annoy for neighbor search, n_neighbors = 30 21:35:30 Building Annoy index with metric = cosine, n_trees = 50 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | 21:35:46 Writing NN index file to temp file /tmp/RtmpJjUFIM/file2dbbc8393d8972 21:35:46 Searching Annoy index using 1 thread, search_k = 3000 21:36:44 Annoy recall = 100% 21:36:45 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 21:36:50 Initializing from normalized Laplacian + noise (using RSpectra) 21:37:04 Commencing optimization for 200 epochs, with 5832214 positive edges 21:37:04 Using rng type: pcg 21:38:33 Optimization finished
In [18]:
DimPlot(Sobj, reduction = "umap")
DimPlot(Sobj, reduction = "umap", group.by = c("orig.ident", "batch"))
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [19]:
p1 <- DimPlot(Sobj, reduction = "umap", group.by = "orig.ident")
p2 <- DimPlot(Sobj, reduction = "umap", group.by = "batch")
p1
p2
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [20]:
Idents(Sobj) <- Sobj$batch
A <- subset(Sobj, ident = "GSE263932")
p3 <- DimPlot(A, reduction = "umap", group.by = "orig.ident")
B <- subset(Sobj, ident = "GSE163121")
p4 <- DimPlot(B, reduction = "umap", group.by = "orig.ident")
C <- subset(Sobj, ident = "GSE136035")
p5 <- DimPlot(C, reduction = "umap", group.by = "orig.ident")
p3
p4
p5
In [21]:
Sobj
An object of class Seurat 49632 features across 133389 samples within 1 assay Active assay: RNA (49632 features, 2000 variable features) 47 layers present: counts.GSM8207627, counts.GSM8207628, counts.GSM8207629, counts.GSM8207630, counts.GSM8207631, counts.GSM8207632, counts.GSM8207633, counts.GSM8207634, counts.GSM8207635, counts.GSM8207636, counts.GSM4972213, counts.GSM4972214, counts.GSM4972215, counts.GSM4972216, counts.GSM4972217, counts.GSM4039805, counts.GSM4039807, counts.GSM4039809, counts.GSM4039813, counts.GSM4039815, counts.GSM4039819, counts.GSM4885423, counts.GSM4885425, data.GSM8207627, data.GSM8207628, data.GSM8207629, data.GSM8207630, data.GSM8207631, data.GSM8207632, data.GSM8207633, data.GSM8207634, data.GSM8207635, data.GSM8207636, data.GSM4972213, data.GSM4972214, data.GSM4972215, data.GSM4972216, data.GSM4972217, data.GSM4039805, data.GSM4039807, data.GSM4039809, data.GSM4039813, data.GSM4039815, data.GSM4039819, data.GSM4885423, data.GSM4885425, scale.data 4 dimensional reductions calculated: pca, umap.unintegrated, harmony, umap
In [22]:
Sobj <- FindNeighbors(Sobj, dims = 1:15, reduction = "harmony")
Sobj <- FindClusters(Sobj, reduction = "umap", resolution = c(0.5,1,1.5))
Computing nearest neighbor graph Computing SNN Warning message: “The following arguments are not used: reduction” Warning message: “The following arguments are not used: reduction”
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 133389 Number of edges: 3302861 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.9029 Number of communities: 17 Elapsed time: 53 seconds
1 singletons identified. 16 final clusters.
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 133389 Number of edges: 3302861 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.8543 Number of communities: 28 Elapsed time: 52 seconds
1 singletons identified. 27 final clusters.
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck Number of nodes: 133389 Number of edges: 3302861 Running Louvain algorithm... Maximum modularity in 10 random starts: 0.8263 Number of communities: 36 Elapsed time: 58 seconds
1 singletons identified. 35 final clusters.
In [23]:
Sobj@reductions
# DimPlot(Sobj, reduction = "umap", group.by = )
$pca A dimensional reduction object with key PC_ Number of dimensions: 50 Number of cells: 133389 Projected dimensional reduction calculated: FALSE Jackstraw run: FALSE Computed using assay: RNA $umap.unintegrated A dimensional reduction object with key umapunintegrated_ Number of dimensions: 2 Number of cells: 133389 Projected dimensional reduction calculated: FALSE Jackstraw run: FALSE Computed using assay: RNA $harmony A dimensional reduction object with key harmony_ Number of dimensions: 50 Number of cells: 133389 Projected dimensional reduction calculated: TRUE Jackstraw run: FALSE Computed using assay: RNA $umap A dimensional reduction object with key umap_ Number of dimensions: 2 Number of cells: 133389 Projected dimensional reduction calculated: FALSE Jackstraw run: FALSE Computed using assay: RNA
In [24]:
# Sobj$`RNA_snn_res.1`
p6 <- DimPlot(Sobj, reduction = "umap", group.by = "RNA_snn_res.1")
p7 <- DimPlot(Sobj, reduction = "umap", group.by = "RNA_snn_res.1.5")
p8 <- DimPlot(Sobj, reduction = "umap", group.by = "RNA_snn_res.0.5")
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [25]:
Sobj <- JoinLayers(Sobj)
In [26]:
Idents(Sobj) <- Sobj$RNA_snn_res.1
Sobj <- SetIdent(Sobj, value = factor(Idents(Sobj), levels = c(0:26)))
levels(Idents(Sobj))
if (!file.exists("~/projects/2024/RA/Integrated_B_cells_wilcox.rds")) {
All.markers <- FindAllMarkers(Sobj, test.use = "wilcox", min.pct = 0.1, logfc.threshold = 0.58, verbose = F, only.pos = T)
saveRDS(All.markers, file = "~/projects/2024/RA/Integrated_B_cells_wilcox.rds")
} else {
All.markers <- readRDS("~/projects/2024/RA/Integrated_B_cells_wilcox.rds")
}
- '0'
- '1'
- '2'
- '3'
- '4'
- '5'
- '6'
- '7'
- '8'
- '9'
- '10'
- '11'
- '12'
- '13'
- '14'
- '15'
- '16'
- '17'
- '18'
- '19'
- '20'
- '21'
- '22'
- '23'
- '24'
- '25'
- '26'
In [27]:
top_5 <- All.markers %>%
group_by(cluster) %>%
filter(avg_log2FC > 0.58) %>%
slice_head(n = 5) %>%
ungroup()
DoHeatmap(Sobj, features = top_5$gene) + NoLegend()
Warning message in DoHeatmap(Sobj, features = top_5$gene): “The following features were omitted as they were not found in the scale.data slot for the RNA assay: CAVIN2, PRKCQ-AS1, LINC02446, ATP5G2, GLTSCR2, ATP5E, GNB2L1, AC090498.1, MT-ATP6, MT-ATP8, MT-ND2, MT-ND4, XIST, CXCR4, EZR, BANK1, BLK, MS4A1”
In [43]:
top_200 <- All.markers %>%
group_by(cluster) %>%
filter(avg_log2FC > 0.58) %>%
filter(p_val_adj < 0.05) %>%
slice_head(n = 200) %>%
ungroup()
write.csv(top_200, "~/projects/2024/RA/top200_genes.csv")
In [28]:
p9 <- DimPlot(Sobj, reduction = "umap", label = T)
p9
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [29]:
length(VariableFeatures(Sobj))
2000
In [30]:
Sobj <- ScaleData(Sobj, features = VariableFeatures(Sobj))
Centering and scaling data matrix
In [31]:
top_5
| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | cluster | gene |
|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <fct> | <chr> |
| 0 | 1.0016561 | 0.736 | 0.366 | 0 | 0 | TCL1A |
| 0 | 0.7821481 | 0.902 | 0.591 | 0 | 0 | CD79A |
| 0 | 0.6451225 | 0.879 | 0.584 | 0 | 0 | MS4A1 |
| 0 | 0.7026985 | 0.711 | 0.454 | 0 | 0 | IGHM |
| 0 | 0.6302040 | 0.978 | 0.735 | 0 | 0 | HLA-DRA |
| 0 | 1.1027207 | 0.831 | 0.380 | 0 | 1 | TCL1A |
| 0 | 0.6825485 | 0.845 | 0.455 | 0 | 1 | IGHM |
| 0 | 0.9162647 | 0.977 | 0.603 | 0 | 1 | CD79A |
| 0 | 0.9630913 | 0.838 | 0.473 | 0 | 1 | CD79B |
| 0 | 0.7898648 | 0.955 | 0.595 | 0 | 1 | MS4A1 |
| 0 | 1.6510946 | 0.899 | 0.373 | 0 | 2 | TCL1A |
| 0 | 1.9512583 | 0.682 | 0.207 | 0 | 2 | PLD4 |
| 0 | 1.6193619 | 0.752 | 0.301 | 0 | 2 | VPREB3 |
| 0 | 0.9324660 | 0.882 | 0.452 | 0 | 2 | IGHM |
| 0 | 1.6630399 | 0.653 | 0.228 | 0 | 2 | ITM2C |
| 0 | 1.0423272 | 0.593 | 0.269 | 0 | 3 | BLK |
| 0 | 1.1999866 | 0.509 | 0.187 | 0 | 3 | ADAM28 |
| 0 | 0.6643063 | 0.739 | 0.423 | 0 | 3 | BANK1 |
| 0 | 0.6028050 | 0.840 | 0.531 | 0 | 3 | EZR |
| 0 | 2.5134700 | 0.360 | 0.062 | 0 | 3 | TNFRSF13B |
| 0 | 3.7162498 | 0.937 | 0.090 | 0 | 4 | CST3 |
| 0 | 5.0576359 | 0.921 | 0.092 | 0 | 4 | S100A9 |
| 0 | 4.6656172 | 0.895 | 0.074 | 0 | 4 | LYZ |
| 0 | 4.0048518 | 0.881 | 0.081 | 0 | 4 | TYROBP |
| 0 | 5.1504688 | 0.788 | 0.071 | 0 | 4 | S100A8 |
| 0 | 1.8673022 | 0.668 | 0.311 | 0 | 5 | CD83 |
| 0 | 1.5748154 | 0.859 | 0.527 | 0 | 5 | CD69 |
| 0 | 1.7217484 | 0.515 | 0.219 | 0 | 5 | NR4A2 |
| 0 | 0.9240831 | 0.569 | 0.298 | 0 | 5 | FOSB |
| 0 | 0.9043073 | 0.798 | 0.544 | 0 | 5 | CXCR4 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 0.000000e+00 | 2.909744 | 0.688 | 0.133 | 0.000000e+00 | 21 | IL7R |
| 0.000000e+00 | 2.864332 | 0.679 | 0.128 | 0.000000e+00 | 21 | CD3E |
| 0.000000e+00 | 4.325364 | 0.223 | 0.012 | 0.000000e+00 | 21 | LINC02446 |
| 9.221540e-276 | 3.933398 | 0.145 | 0.008 | 4.576835e-271 | 21 | LRRN3 |
| 2.193378e-274 | 3.456882 | 0.213 | 0.018 | 1.088617e-269 | 21 | PRKCQ-AS1 |
| 0.000000e+00 | 2.101964 | 0.606 | 0.052 | 0.000000e+00 | 22 | NRGN |
| 0.000000e+00 | 1.974742 | 0.518 | 0.028 | 0.000000e+00 | 22 | PPBP |
| 0.000000e+00 | 2.600920 | 0.389 | 0.015 | 0.000000e+00 | 22 | PF4 |
| 0.000000e+00 | 2.281540 | 0.308 | 0.013 | 0.000000e+00 | 22 | CAVIN2 |
| 0.000000e+00 | 1.644241 | 0.306 | 0.012 | 0.000000e+00 | 22 | TUBB1 |
| 0.000000e+00 | 4.938239 | 0.709 | 0.029 | 0.000000e+00 | 23 | GZMB |
| 0.000000e+00 | 7.734842 | 0.596 | 0.015 | 0.000000e+00 | 23 | LILRA4 |
| 0.000000e+00 | 4.790950 | 0.709 | 0.135 | 0.000000e+00 | 23 | UGCG |
| 0.000000e+00 | 4.408651 | 0.667 | 0.132 | 0.000000e+00 | 23 | APP |
| 0.000000e+00 | 6.810819 | 0.496 | 0.016 | 0.000000e+00 | 23 | SERPINF1 |
| 0.000000e+00 | 8.735087 | 0.777 | 0.003 | 0.000000e+00 | 24 | TYMS |
| 0.000000e+00 | 4.385809 | 0.893 | 0.138 | 0.000000e+00 | 24 | STMN1 |
| 0.000000e+00 | 7.379216 | 0.740 | 0.003 | 0.000000e+00 | 24 | MKI67 |
| 0.000000e+00 | 8.287288 | 0.704 | 0.003 | 0.000000e+00 | 24 | RRM2 |
| 0.000000e+00 | 3.342026 | 0.753 | 0.087 | 0.000000e+00 | 24 | SMC4 |
| 5.893382e-72 | 1.328529 | 0.534 | 0.156 | 2.925003e-67 | 25 | S100A9 |
| 4.291684e-44 | 1.368119 | 0.419 | 0.138 | 2.130048e-39 | 25 | LYZ |
| 5.870744e-44 | 2.291101 | 0.389 | 0.135 | 2.913768e-39 | 25 | IL7R |
| 4.202430e-34 | 1.073169 | 0.419 | 0.156 | 2.085750e-29 | 25 | CST3 |
| 1.024939e-32 | 3.810702 | 0.141 | 0.028 | 5.086979e-28 | 25 | NELL2 |
| 0.000000e+00 | 4.961341 | 0.643 | 0.006 | 0.000000e+00 | 26 | IGHV1-58 |
| 0.000000e+00 | 7.148758 | 0.400 | 0.002 | 0.000000e+00 | 26 | CXCL10 |
| 0.000000e+00 | 6.273329 | 0.300 | 0.002 | 0.000000e+00 | 26 | TRBV5-5 |
| 4.455427e-294 | 4.531879 | 0.543 | 0.015 | 2.211317e-289 | 26 | IGKV3D-20 |
| 2.430859e-207 | 5.237425 | 0.343 | 0.008 | 1.206484e-202 | 26 | TMEM176B |
In [32]:
tm <- top_5$gene[!duplicated(top_5$gene)]
length(tm)
length(unique(tm))
write.csv(top_5, "~/projects/2024/RA/top5_genes.csv")
write.csv(All.markers, "~/projects/2024/RA/allmarker_genes.csv")
p10 <- DotPlot(Sobj, features = tm, scale = F) + coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
p10
86
86
In [33]:
B_m_list <- c("CD79A", "CD79B", "MS4A1", "CD19", "CD37", "JCHAIN", "MZB1", "SDC1", "MKI67", "TOP2A", "TUBB", "STMN1", "TYMS", "FCRL5", "FCRL3", "FCGR2B", "CD72", "IFITM1", "MX1", "IFI44L", "IRF7", "IL4R", "FCER2", "IGHD", "MME", "CD24", "CD38", "RAG1", "RAG2", "VPREB1", "IGLL1")
In [34]:
p11 <- DotPlot(Sobj, features = rev(B_m_list), scale = F) + coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
p11
In [35]:
pdf("~/projects/2024/RA/Integrated_umap_result_050125.pdf", width = 8, height = 6)
p1
p2
p3
p4
p5
p6
p7
p8
p9
dev.off()
pdf("~/projects/2024/RA/Integrated_marker_dotplot_050125.pdf", width = 10, height = 10)
p10
p11
dev.off()
pdf: 2
pdf: 2
In [36]:
p12 <- FeaturePlot(Sobj, reduction = "umap", features = B_m_list, ncol = 3)
p12
pdf("~/projects/2024/RA/Integrated_marker_feature_050225.pdf", width = 12, height = 30)
p12
dev.off()
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
pdf: 2
In [37]:
FeaturePlot(Sobj, reduction = "umap", features = "IFI44L")
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [38]:
# b_1 <- c("CD24", "CD38", "MME", "CD19", "MS4A1", "IGHD", "CD27", "IGHM", "IGHG1", "FCRL5", "TBX21", "ITGAX", "CXCR3", "BCL6", "AICDA", "A4GALT", "CD83", "CD69", "CCR7", "PRDM1", "XBP1", "SDC1", "PAX5", "IGHE")
b_1 <- c("CD19", "MS4A1", "CD27", "IGHD", "IGHM", "CD38", "IGHA1", "IGHG1", "FCRL5", "TBX21", "ITGAX", "CXCR3", "BCL6", "AICDA", "MKI67", "MZB1", "PRDM1", "XBP1", "SDC1", "CD24", "CD1D", "CR2", "IL10", "CD5")
p13 <- FeaturePlot(Sobj, reduction = "umap", features = b_1, ncol = 3)
p13
pdf("~/projects/2024/RA/Integrated_marker_feature_1_050225.pdf", width = 12, height = 30)
p13
dev.off()
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
pdf: 2
In [39]:
print(b_1[!is.element(b_1,rownames(Sobj))])
b_1[duplicated(b_1)]
character(0)
In [40]:
p14 <- DotPlot(Sobj, features = rev(b_1), scale = F) + coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
p14
pdf("~/projects/2024/RA/Integrated_marker_dotplot_1_050125.pdf", width = 10, height = 10)
p14
dev.off()
pdf: 2
In [85]:
B_subset_marker <- c("MYC", "EBF1", "PAX5", "DTX1", "BANK1", "CD55", "CD19", "CD93", "CD24", "CD38", "IGHM", "IGHD", "IGHG1", "FCER2", "CR2", "SELL", "CD5", "SPN", "CD27", "CD80", "NT5E", "PDCD1LG2", "BCL6", "AICDA", "MKI67", "CXCR4", "CD83", "CD86", "CD74", "PRDM1", "XBP1", "IRF4", "SDC1", "JCHAIN", "MZB1")
B_subset_marker[duplicated(B_subset_marker)]
b_2 <- B_subset_marker[is.element(B_subset_marker,rownames(Sobj))]
p15 <- DotPlot(Sobj, features = rev(b_2), scale = F) + coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
p15
pdf("~/projects/2024/RA/Integrated_marker_dotplot_2_050125.pdf", width = 10, height = 10)
p15
dev.off()
pdf: 2
In [52]:
FeaturePlot(Sobj, reduction = "umap", features = c("CD3E", "CD8A", "MS4A1", "CD19", "CD79A"))
FeaturePlot(Sobj, reduction = "umap", features = c("LYZ", "CD14", "FCGR3A", "S100A8", "S100A9"))
FeaturePlot(Sobj, reduction = "umap", features = c("PF4"))
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [53]:
FeaturePlot(Sobj, reduction = "umap", features = c("TBX21", "ITGAX", "FCRL5", "CD27", "IGHD"))
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [56]:
- 11
- 9
- 4
- 21
- 7
- 6
- 8
- 17
- 0
- 20
- 12
- 19
- 25
- 18
- 24
- 3
- 15
- 2
- 1
- 23
- 14
- 5
- 16
- 13
- 22
- 26
- 10
Levels:
- '0'
- '1'
- '10'
- '11'
- '12'
- '13'
- '14'
- '15'
- '16'
- '17'
- '18'
- '19'
- '2'
- '20'
- '21'
- '22'
- '23'
- '24'
- '25'
- '26'
- '3'
- '4'
- '5'
- '6'
- '7'
- '8'
- '9'
In [145]:
Idents(Sobj) <- Sobj$`RNA_snn_res.1`
unique(Idents(Sobj))
rename_map <- c(
setNames(rep("Naive B", 5), c(0, 1, 2, 5, 22)),
setNames(rep("Atypical memory B", 1), c(10)),
setNames(rep("CD27+ memory B", 2), c(3, 13)),
setNames(rep("INF-induced memory B", 1), c(14)),
setNames(rep("Activated B", 3), c(7, 11, 22)),
setNames(rep("Unswitched memory B", 1), c(16)),
setNames(rep("T cell like B", 5), c(6, 8, 9, 25, 21)),
setNames(rep("Myeloid like B", 7), c(4, 12, 17, 18, 20, 23, 26)),
setNames(rep("Plasma", 1), c(15)),
setNames(rep("Plasmablast", 1), c(24)),
setNames(rep("Megakaryocyte like B", 1), c(19))
)
# Rename
Sobj <- RenameIdents(Sobj, rename_map)
# Idents(Sobj) <- factor(Idents(Sobj), levels = c("Naive B", "T cell like B", "CD27+ memory B", "Myeloid like B", "Activated B", "Atypical memory B", "INF-induced memory B", "Plasma", "Unswitched memory B", "Megakaryocyte like B", "Plasmablast"))
Idents(Sobj) <- factor(Idents(Sobj), levels = c("Naive B", "Activated B", "Unswitched memory B", "CD27+ memory B", "Atypical memory B", "INF-induced memory B", "Plasmablast", "Plasma", "T cell like B", "Myeloid like B", "Megakaryocyte like B"))
Sobj$anno_text <- Idents(Sobj)
unique(Idents(Sobj))
head(Idents(Sobj))
- 11
- 9
- 4
- 21
- 7
- 6
- 8
- 17
- 0
- 20
- 12
- 19
- 25
- 18
- 24
- 3
- 15
- 2
- 1
- 23
- 14
- 5
- 16
- 13
- 22
- 26
- 10
Levels:
- '0'
- '1'
- '10'
- '11'
- '12'
- '13'
- '14'
- '15'
- '16'
- '17'
- '18'
- '19'
- '2'
- '20'
- '21'
- '22'
- '23'
- '24'
- '25'
- '26'
- '3'
- '4'
- '5'
- '6'
- '7'
- '8'
- '9'
- Activated B
- T cell like B
- Myeloid like B
- Naive B
- Megakaryocyte like B
- Plasmablast
- CD27+ memory B
- Plasma
- INF-induced memory B
- Unswitched memory B
- Atypical memory B
Levels:
- 'Naive B'
- 'Activated B'
- 'Unswitched memory B'
- 'CD27+ memory B'
- 'Atypical memory B'
- 'INF-induced memory B'
- 'Plasmablast'
- 'Plasma'
- 'T cell like B'
- 'Myeloid like B'
- 'Megakaryocyte like B'
- GSM8207627_AAACCTGAGACCTTTG-1
- Activated B
- GSM8207627_AAACCTGAGATCCTGT-1
- T cell like B
- GSM8207627_AAACCTGAGATGTAAC-1
- Myeloid like B
- GSM8207627_AAACCTGAGCACCGTC-1
- Activated B
- GSM8207627_AAACCTGAGGCGATAC-1
- Myeloid like B
- GSM8207627_AAACCTGAGTCAATAG-1
- Myeloid like B
Levels:
- 'Naive B'
- 'Activated B'
- 'Unswitched memory B'
- 'CD27+ memory B'
- 'Atypical memory B'
- 'INF-induced memory B'
- 'Plasmablast'
- 'Plasma'
- 'T cell like B'
- 'Myeloid like B'
- 'Megakaryocyte like B'
In [146]:
sort(table(Idents(Sobj)))
Plasmablast Megakaryocyte like B Unswitched memory B
466 918 1536
Plasma INF-induced memory B Atypical memory B
1708 1846 4289
Activated B Myeloid like B CD27+ memory B
9574 16127 16358
T cell like B Naive B
17892 62675
In [148]:
table(Idents(Sobj))
pp1<- DimPlot(Sobj, reduction = "umap", label = T, repel = T)
pp1
Naive B Activated B Unswitched memory B
62675 9574 1536
CD27+ memory B Atypical memory B INF-induced memory B
16358 4289 1846
Plasmablast Plasma T cell like B
466 1708 17892
Myeloid like B Megakaryocyte like B
16127 918
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [69]:
FeaturePlot(Sobj, reduction = "umap", features = c("CD27"))
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [149]:
options(repr.plot.width=8, repr.plot.height=16)
tm <- c(B_subset_marker, b_1, B_m_list)
tm <- tm[!duplicated(tm)]
p16 <- DotPlot(Sobj, features = rev(tm), scale = F) +
coord_flip() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
p16
options(repr.plot.width=10, repr.plot.height=8)
In [150]:
options(repr.plot.width=8, repr.plot.height=16)
tm <- c(B_subset_marker, b_1, B_m_list)
tm <- tm[!duplicated(tm)]
p16 <- DotPlot(Sobj, features = rev(tm), scale = T) +
scale_color_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
coord_flip() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
p16
options(repr.plot.width=10, repr.plot.height=8)
Scale for colour is already present. Adding another scale for colour, which will replace the existing scale.
In [151]:
# mm <- c("CD19", "MS4A1", "CD22", "IGHD", "TCL1A", "IL4R", "FCMR", "CCR7", "SELL", "IGHM", "CD3D", "CD3E", "CD3G", "CD8A", "CD8B", "TCF7", "LEF1", "CD7", "CD2", "GZMA", "GZMK", "IGHA1", "IGHA2", "IGHG1", "IGHG2", "IGHG3", "IGHG4", "TNFRSF13B", "FCRL3", "FCRL5", "LYZ", "S100A8", "S100A9", "S100A12", "CD14", "FCN1", "VCAN", "TYROBP", "CLEC12A", "CD83", "CD69", "CD86", "IRF4", "BATF", "TBX21", "ITGAX", "CXCR3", "SIGLEC6", "CR2", "ZEB2", "CXCR5", "IFI6", "IFITM1", "IFITM2", "IFITM3", "ISG15", "MX1", "STAT1", "IRF7", "OAS1", "IFI44L", "RSAD2", "MZB1", "DERL3", "FKBP11", "CD27", "PF4", "PPBP", "GP1BB", "TUBB1", "SPARC", "NRGN", "CD38", "XBP1", "PRDM1", "IRF4", "JCHAIN", "SDC1")
mm <- c("CD19", "MS4A1", "CD22", "IGHD", "TCL1A", "IL4R", "FCMR", "CCR7", "SELL", "IGHM", "CD27", "CD83", "CD69", "CD86", "IRF4", "BATF","IGHA1", "IGHA2", "IGHG1", "IGHG2", "IGHG3", "IGHG4", "TNFRSF13B", "FCRL3", "FCRL5","TBX21", "ITGAX", "CXCR3", "SIGLEC6", "CR2", "ZEB2", "CXCR5","IFI6", "IFITM1", "IFITM2", "IFITM3", "ISG15", "MX1", "STAT1", "IRF7", "OAS1", "IFI44L", "RSAD2","CD38", "XBP1", "PRDM1", "IRF4", "JCHAIN", "SDC1","MZB1", "DERL3", "FKBP11","CD3D", "CD3E", "CD3G", "CD8A", "CD8B", "TCF7", "LEF1", "CD7", "CD2", "GZMA", "GZMK","LYZ", "S100A8", "S100A9", "S100A12", "CD14", "FCN1", "VCAN", "TYROBP", "CLEC12A","PF4", "PPBP", "GP1BB", "TUBB1", "SPARC", "NRGN")
options(repr.plot.width=8, repr.plot.height=16)
mm <- mm[!duplicated(mm)]
p17 <- DotPlot(Sobj, features = rev(mm), scale = F) +
coord_flip() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), axis.title.x = element_blank(), plot.margin = margin(10, 10, 20, 10))
p17
options(repr.plot.width=10, repr.plot.height=8)
In [152]:
options(repr.plot.width=8, repr.plot.height=16)
p18 <- DotPlot(Sobj, features = rev(mm), scale = T) +
scale_color_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
coord_flip() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), axis.title.x = element_blank(), plot.margin = margin(10, 10, 20, 10))
p18
options(repr.plot.width=10, repr.plot.height=8)
Scale for colour is already present. Adding another scale for colour, which will replace the existing scale.
In [157]:
pdf("~/projects/2024/RA/dotplot_1_050625.pdf", width = 8, height = 16)
p17
# p18
dev.off()
pdf: 2
In [154]:
FeaturePlot(Sobj, features = "BCL6", reduction = "umap")
Sobj$temp <- Sobj@assays$RNA$counts["BCL6",]>0
DimPlot(Sobj, group.by = "temp", reduction = "umap")
Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE` Rasterizing points since number of points exceeds 100,000. To disable this behavior set `raster=FALSE`
In [155]:
p15_1 <- DotPlot(Sobj, features = rev(b_2), scale = F) + coord_flip() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
p15_1
pdf("~/projects/2024/RA/Integrated_marker_dotplot_SY_050625.pdf", width = 6, height = 12)
p15_1
dev.off()
pdf: 2
In [159]:
pdf("~/projects/2024/RA/UMAP_new_anno_050625.pdf", width = 7, height = 4.5)
# p1
# p2
# p3
# p4
# p5
p9
pp1
dev.off()
pdf: 2
In [161]:
'~/projects/2024/RA/GSE136035_RAW/'
In [ ]:
if (!exist("GSE263932_meta")) {
GSE263932_meta <- readRDS(paste0("~/projects/2024/RA/GSE263932/GSE263932_meta.rds"))
}
if (!exist("GSE163121_meta")) {
GSE163121_meta <- readRDS(paste0("~/projects/2024/RA/GSE163121_RAW/out/GSE163121_meta.rds"))
}
if (!exist("GSE136035_meta")) {
GSE136035_meta <- readRDS(paste0("~/projects/2024/RA/GSE136035_RAW/GSE136035_meta.rds"))
}
In [ ]:
In [ ]:
In [ ]: